Skip to content

Conversation

@arsenm
Copy link
Contributor

@arsenm arsenm commented May 20, 2025

Failures appeared after #140587 but this case wasn't covered

Copy link
Contributor Author

arsenm commented May 20, 2025

This stack of pull requests is managed by Graphite. Learn more about stacking.

@llvmbot
Copy link
Member

llvmbot commented May 20, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/140784.diff

1 Files Affected:

  • (modified) llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll (+111)
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
index ee62359cffc63..aea6329d56885 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
@@ -424,4 +424,115 @@ vector.body.i.i.i.i:                              ; preds = %.shuffle.then.i.i.i
   ret void
 }
 
+; Check that we do not produce a verifier error after prolog
+; epilog. alloca1 and alloca2 will lower to literals.
+
+; GCN-LABEL: {{^}}s_multiple_frame_indexes_literal_offsets:
+; GCN: s_load_dword [[ARG0:s[0-9]+]]
+; GCN: s_movk_i32 [[ALLOCA1:s[0-9]+]], 0x44
+; GCN: s_cmp_eq_u32 [[ARG0]], 0
+; GCN: s_cselect_b32 [[SELECT:s[0-9]+]], [[ALLOCA1]], 0x48
+; GCN: s_mov_b32 [[ALLOCA0:s[0-9]+]], 0
+; GCN: ; use [[SELECT]], [[ALLOCA0]]
+define amdgpu_kernel void @s_multiple_frame_indexes_literal_offsets(i32 inreg %arg0) #0 {
+  %alloca0 = alloca [17 x i32], align 8, addrspace(5)
+  %alloca1 = alloca i32, align 4, addrspace(5)
+  %alloca2 = alloca i32, align 4, addrspace(5)
+  %cmp = icmp eq i32 %arg0, 0
+  %select = select i1 %cmp, ptr addrspace(5) %alloca1, ptr addrspace(5) %alloca2
+  call void asm sideeffect "; use $0, $1","s,s"(ptr addrspace(5) %select, ptr addrspace(5) %alloca0)
+  ret void
+}
+
+; %alloca1 or alloca2 will lower to an inline constant, and one will
+; %be a literal, so we could fold both indexes into the instruction.
+
+; GCN-LABEL: {{^}}s_multiple_frame_indexes_one_imm_one_literal_offset:
+; GCN: s_load_dword [[ARG0:s[0-9]+]]
+; GCN: s_mov_b32 [[ALLOCA1:s[0-9]+]], 64
+; GCN: s_cmp_eq_u32 [[ARG0]], 0
+; GCN: s_cselect_b32 [[SELECT:s[0-9]+]], [[ALLOCA1]], 0x44
+; GCN: s_mov_b32 [[ALLOCA0:s[0-9]+]], 0
+; GCN: ; use [[SELECT]], [[ALLOCA0]]
+define amdgpu_kernel void @s_multiple_frame_indexes_one_imm_one_literal_offset(i32 inreg %arg0) #0 {
+  %alloca0 = alloca [16 x i32], align 8, addrspace(5)
+  %alloca1 = alloca i32, align 4, addrspace(5)
+  %alloca2 = alloca i32, align 4, addrspace(5)
+  %cmp = icmp eq i32 %arg0, 0
+  %select = select i1 %cmp, ptr addrspace(5) %alloca1, ptr addrspace(5) %alloca2
+  call void asm sideeffect "; use $0, $1","s,s"(ptr addrspace(5) %select, ptr addrspace(5) %alloca0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}s_multiple_frame_indexes_imm_offsets:
+; GCN: s_load_dword [[ARG0:s[0-9]+]]
+; GCN: s_mov_b32 [[ALLOCA1:s[0-9]+]], 16
+; GCN: s_cmp_eq_u32 [[ARG0]], 0
+; GCN: s_cselect_b32 [[SELECT:s[0-9]+]], [[ALLOCA1]], 20
+; GCN: s_mov_b32 [[ALLOCA0:s[0-9]+]], 0
+; GCN: ; use [[SELECT]], [[ALLOCA0]]
+define amdgpu_kernel void @s_multiple_frame_indexes_imm_offsets(i32 inreg %arg0) #0 {
+  %alloca0 = alloca [4 x i32], align 8, addrspace(5)
+  %alloca1 = alloca i32, align 4, addrspace(5)
+  %alloca2 = alloca i32, align 4, addrspace(5)
+  %cmp = icmp eq i32 %arg0, 0
+  %select = select i1 %cmp, ptr addrspace(5) %alloca1, ptr addrspace(5) %alloca2
+  call void asm sideeffect "; use $0, $1","s,s"(ptr addrspace(5) %select, ptr addrspace(5) %alloca0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}v_multiple_frame_indexes_literal_offsets:
+; GCN: v_mov_b32_e32 [[ALLOCA1:v[0-9]+]], 0x48
+; GCN: v_mov_b32_e32 [[ALLOCA2:v[0-9]+]], 0x44
+; GCN: v_cmp_eq_u32_e32 vcc, 0, v0
+; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[ALLOCA1]], [[ALLOCA2]], vcc
+; GCN: v_mov_b32_e32 [[ALLOCA0:v[0-9]+]], 0{{$}}
+; GCN: ; use [[SELECT]], [[ALLOCA0]]
+define amdgpu_kernel void @v_multiple_frame_indexes_literal_offsets() #0 {
+  %vgpr = call i32 @llvm.amdgcn.workitem.id.x()
+  %alloca0 = alloca [17 x i32], align 8, addrspace(5)
+  %alloca1 = alloca i32, align 4, addrspace(5)
+  %alloca2 = alloca i32, align 4, addrspace(5)
+  %cmp = icmp eq i32 %vgpr, 0
+  %select = select i1 %cmp, ptr addrspace(5) %alloca1, ptr addrspace(5) %alloca2
+  call void asm sideeffect "; use $0, $1","v,v"(ptr addrspace(5) %select, ptr addrspace(5) %alloca0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}v_multiple_frame_indexes_one_imm_one_literal_offset:
+; GCN: v_mov_b32_e32 [[ALLOCA1:v[0-9]+]], 0x44
+; GCN: v_mov_b32_e32 [[ALLOCA2:v[0-9]+]], 64
+; GCN: v_cmp_eq_u32_e32 vcc, 0, v0
+; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[ALLOCA1]], [[ALLOCA2]], vcc
+; GCN: v_mov_b32_e32 [[ALLOCA0:v[0-9]+]], 0{{$}}
+; GCN: ; use [[SELECT]], [[ALLOCA0]]
+define amdgpu_kernel void @v_multiple_frame_indexes_one_imm_one_literal_offset() #0 {
+  %vgpr = call i32 @llvm.amdgcn.workitem.id.x()
+  %alloca0 = alloca [16 x i32], align 8, addrspace(5)
+  %alloca1 = alloca i32, align 4, addrspace(5)
+  %alloca2 = alloca i32, align 4, addrspace(5)
+  %cmp = icmp eq i32 %vgpr, 0
+  %select = select i1 %cmp, ptr addrspace(5) %alloca1, ptr addrspace(5) %alloca2
+  call void asm sideeffect "; use $0, $1","v,v"(ptr addrspace(5) %select, ptr addrspace(5) %alloca0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}v_multiple_frame_indexes_imm_offsets:
+; GCN: v_mov_b32_e32 [[ALLOCA1:v[0-9]+]], 12
+; GCN: v_mov_b32_e32 [[ALLOCA2:v[0-9]+]], 8
+; GCN: v_cmp_eq_u32_e32 vcc, 0, v0
+; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[ALLOCA1]], [[ALLOCA2]], vcc
+; GCN: v_mov_b32_e32 [[ALLOCA0:v[0-9]+]], 0{{$}}
+; GCN: ; use [[SELECT]], [[ALLOCA0]]
+define amdgpu_kernel void @v_multiple_frame_indexes_imm_offsets() #0 {
+  %vgpr = call i32 @llvm.amdgcn.workitem.id.x()
+  %alloca0 = alloca [2 x i32], align 8, addrspace(5)
+  %alloca1 = alloca i32, align 4, addrspace(5)
+  %alloca2 = alloca i32, align 4, addrspace(5)
+  %cmp = icmp eq i32 %vgpr, 0
+  %select = select i1 %cmp, ptr addrspace(5) %alloca1, ptr addrspace(5) %alloca2
+  call void asm sideeffect "; use $0, $1","v,v"(ptr addrspace(5) %select, ptr addrspace(5) %alloca0)
+  ret void
+}
+
 attributes #0 = { nounwind }

@arsenm arsenm marked this pull request as ready for review May 20, 2025 18:46
Copy link
Contributor Author

arsenm commented May 20, 2025

Merge activity

  • May 20, 8:36 PM UTC: A user started a stack merge that includes this pull request via Graphite.
  • May 20, 8:37 PM UTC: @arsenm merged this pull request with Graphite.

@arsenm arsenm merged commit 5aa3171 into main May 20, 2025
8 of 10 checks passed
@arsenm arsenm deleted the users/arsenm/amdgpu/add-frame-index-elimination-lowering-test branch May 20, 2025 20:37
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants